import os
import json
import torch
from openai import AzureOpenAI
from tqdm import tqdm
import sys
import re
import base64
import cv2
from PIL import Image
from io import BytesIO
import random
import pandas as pd
import argparse
# CLI arguments
parser = argparse.ArgumentParser()
parser.add_argument("--start", type=int, default=0, help="Starting sample index")
parser.add_argument("--end", type=int, default=None, help="Ending sample index")
parser.add_argument("--output_dir", type=str, default=".", help="Directory for result JSONs")
parser.add_argument("--similarity_json", type=str, default=None, help="Path to similarity JSON file (optional)")
args = parser.parse_args()
# Similarity data
if args.similarity_json:
    with open(args.similarity_json, "r") as f:
        SIMILARITY_DATA = json.load(f)
else:
    SIMILARITY_DATA = {}

def frame_to_data_url(frame_bgr):
    # Convert the BGR frame (OpenCV format) to RGB
    frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)

    # Convert the RGB frame to a PIL Image
    image = Image.fromarray(frame_rgb)
    image = image.resize((256, 256), Image.LANCZOS)
    # Create a BytesIO buffer to hold the image data
    buffered = BytesIO()
    image.save(buffered, format="JPEG")
    buffered.seek(0)

    # Encode the image data in base64
    base64_encoded_data = base64.b64encode(buffered.read()).decode('utf-8')

    # Construct the data URL
    return f"data:image/jpeg;base64,{base64_encoded_data}"

# Login to Hugging Face
# os.system('huggingface-cli login --token "hf_your_token_here"')

# Azure OpenAI Configuration
api_version = "2024-02-15-preview"
config_dict = {
    'api_key': "YOUR_OPENAI_API_KEY",
    'api_version': api_version,
    'azure_endpoint': "https://your-azure-openai-endpoint/"
}

# Single baseline system prompt
baseline_system_prompt = """You are an expert evaluator of website aesthetics and design. Your task is to assess how much people would like a website based on its visual design, layout, color scheme, typography, and overall aesthetic appeal.

You will be shown 5 example website screenshots with their likeability scores (on a 0-10 scale), followed by a new website screenshot that you need to evaluate.
You can provide precise scores including decimal values (e.g., 7.5, 8.2) to better reflect your nuanced judgment.
Return your response in this exact format:
Reason: [Explain what aspects of the website design make it appealing or unappealing, considering layout, colors, typography, and overall aesthetic quality]
Answer: [0-10] ← You must include this numerical score."""

def get_json_data_generate(sys_prompt, user_prompt, images):
    # images: list of (data_url, score) tuples, last one is the target
    # Build the message with all images
    user_content = [{"type": "text", "text": user_prompt}]
    for idx, (img_url, score) in enumerate(images):
        if idx < len(images) - 1:
            # Example images
            user_content.append({
                "type": "image_url",
                "image_url": {"url": img_url, "detail": "low"},
                "score": f"{score:.2f}"
            })
        else:
            # The image to be scored
            user_content.append({
                "type": "image_url",
                "image_url": {"url": img_url, "detail": "high"}
            })
    return {
        "messages": [
            {"role": "system", "content": sys_prompt},
            {"role": "user", "content": user_content}
        ]
    }

def verbalize(prompt, sys_prompt, images):
    json_data = get_json_data_generate(sys_prompt, prompt, images)
    client = AzureOpenAI(
        api_key=config_dict['api_key'],
        api_version=config_dict['api_version'],
        azure_endpoint=config_dict['azure_endpoint'],
    )
    response = client.chat.completions.create(
        model='gpt-4o',
        messages=json_data["messages"],
        max_tokens=350,
        temperature=0.85,
        n=1
    )
    return response.choices[0].message.content.strip()

import pandas as pd
import re

def extract_score_from_response(response_text):
    """Extracts the numerical score from the response text."""
    number_matches = re.findall(r'\b\d+\b', response_text)
    if number_matches:
        return float(number_matches[-1])
    return None

def prepare_and_run_evaluation():
    test_filename = "website-aesthetics-datasets/rating-based-dataset/preprocess/test_list.csv"
    df = pd.read_csv(test_filename)
    # Determine slice
    indices = list(range(args.start, args.end + 1 if args.end is not None else len(df)))
    response_dict = []
    os.makedirs(args.output_dir, exist_ok=True)
    for i in tqdm(indices, desc="Processing Samples"):
        try:
            d = df.iloc[i]
            value = d.to_dict()
            image_path = 'website-aesthetics-datasets/rating-based-dataset/images/'+d['image'].replace('_resized','')
            image = cv2.imread(image_path)
            if image is None: continue
            image_url = frame_to_data_url(image)
            # Few-shot example selection
            example_images, example_lines = [], []
            valid_examples = 0
            # 1. Similarity-based retrieval
            similar_list = SIMILARITY_DATA.get(str(i), {}).get("similar_images", [])
            for sim in similar_list:
                if valid_examples >= 5: break
                try:
                    fname = sim["image"]
                    score = sim.get("mean_score")
                    img_path = 'website-aesthetics-datasets/rating-based-dataset/images/' + fname.replace('_resized', '').lstrip('/')
                    img = cv2.imread(img_path)
                    if img is None: continue
                    img_url = frame_to_data_url(img)
                    example_images.append((img_url, score))
                    example_lines.append(f"Score: {score:.1f}" if score is not None else "Score: N/A")
                    valid_examples += 1
                except Exception:
                    continue
            # 2. Random fallback
            if valid_examples < 5:
                other_indices = list(range(len(df)))
                other_indices.remove(i)
                random.shuffle(other_indices)
                for idx in other_indices:
                    if valid_examples >= 5: break
                    try:
                        row = df.iloc[idx]
                        img_path = 'website-aesthetics-datasets/rating-based-dataset/images/' + row['image'].replace('_resized', '')
                        img = cv2.imread(img_path)
                        if img is None: continue
                        img_url = frame_to_data_url(img)
                        example_images.append((img_url, row['mean_score']))
                        example_lines.append(f"Score: {row['mean_score']:.1f}")
                        valid_examples += 1
                    except Exception:
                        continue
            # Add target image
            example_images.append((image_url, None))
            examples_text = "\n".join(example_lines)
            prompt = f"""Given the images below, the first {valid_examples} are example website screenshots with their likeability scores (on a 0-10 scale, see the list below). The last image is the one you should score.

Carefully analyze the last website screenshot and provide a score between 0 to 10 based on how much people would like the website's visual design, layout, colors, typography, and overall aesthetic appeal.
Here are {valid_examples} example likeability scores (in order):
{examples_text}
Please evaluate the final website screenshot and provide your assessment."""
            # Get prediction
            resp = verbalize(prompt, baseline_system_prompt, example_images)
            answer = extract_score_from_response(resp)
            # Store results
            value.update({
                "baseline_response": {"prediction": answer, "reason": resp},
                "no_persona_prediction": answer
            })
            response_dict.append(value)
            # Incremental save
            output_filename = os.path.join(args.output_dir, f'results_gpt_nopersona_slice_{args.start}_{args.end if args.end is not None else "end"}.json')
            with open(output_filename, 'w') as f:
                json.dump(response_dict, f, indent=4)
        except Exception as e:
            print(f"Error on sample {i}: {e}")
            continue
    print(f"Finished. Results in {output_filename}")
if __name__ == "__main__":
    prepare_and_run_evaluation()